* The Australian data (file named EG1EG2_RELIG.dta) is only available by data sharing agreement with Peter Hatemi and the Queensland Institute of Medical Research
* To access the data, contact him at phatemi@gmail.com

*** Set working directory
cd ""

* Load data
clear
use EG1EG2_RELIG.dta 



/* List of variables needed
Social ideology (no religion)
Religiosity
Big Five and Facets
	Agreeableness
	Agreeableness (Altruism)
	Agreeableness (Compliance)
	Conscientiousness
	Conscientiousness (Order)
	Conscientiousness (Self-discipline)
	Extraversion
	Extraversion (Activity)
	Extraversion (Assertiveness)
	Neuroticism
	Neuroticism (Anxiety)
	Neuroticism (Depression)
	Openness
	Openness (Aesthetics)
	Openness (Ideas)
Income
Education
Age
Sex
*/


*** Social ideology
* recode to -1, 0, 1
foreach var of varlist Q17_01-Q17_30 {

recode `var' 2=0 3=-1

}

foreach var of varlist *_5pt {

recode `var' 3=1 1=3

}

foreach num of numlist 1 2 4 6/9 11 12 14 15 18 20/30 {

	if `num' < 10 {
		
		recode RETEST_Q17_0`num' 2=0 3=-1
		capture gen retest_wp_0`num' = RETEST_Q17_0`num' * RETEST_Q17_0`num'_5pt
		replace retest_wp_0`num' = 0 if RETEST_Q17_0`num'==0
	
	}

	if `num' >= 10 {
	
		recode RETEST_Q17_`num' 2=0 3=-1
		capture gen retest_wp_`num' = RETEST_Q17_`num' * RETEST_Q17_`num'_5pt
		replace retest_wp_`num' = 0 if RETEST_Q17_`num'==0
	
	}
	
}

/*
Social questions
2 X rated movies
6 Legalise marijuana
7 Legalise abortion
8 Medicare subsidised abortion
9 Gay marriage
15 stem cell research
18 Living together, not married
20 Women in combat
23 Evolution
24 Euthanasia
*/

* Calculate social ideology at wave 1, wave 2, combined if wave 1 is missing
#delimit ;
alpha Q17_02 Q17_06 Q17_07 Q17_08 Q17_09 
	Q17_15 Q17_18 Q17_20 Q17_23 Q17_24, gen(wp_social) casewise;
#delimit cr

#delimit ;
alpha RETEST_Q17_02 RETEST_Q17_06 RETEST_Q17_07 RETEST_Q17_08 RETEST_Q17_09 
	RETEST_Q17_15 RETEST_Q17_18 RETEST_Q17_20 RETEST_Q17_23 RETEST_Q17_24, gen(retest_wp_social) casewise;
#delimit cr

gen combined_wp_social = wp_social
replace combined_wp_social = retest_wp_social if wp_social==.

* Reverse composite scales so that conservative values are higher
replace wp_social = wp_social * -1
replace retest_wp_social = retest_wp_social * -1
replace combined_wp_social = combined_wp_social * -1

* Rescale ideology to be on a 0-1

foreach var of varlist wp_social retest_wp_social combined_wp_social {
	qui sum `var'
	local min = r(min)
	replace `var' = `var' - r(min)
	qui sum `var'
	local max = r(max)
	replace `var' = `var' / `max'
}

rename retest_wp_social wp_social_retest



* Relig - 18, 32, 33, 34, 35

* Q18_02 - importance of religion in life

rename Q18_02 imp_rel

gen imp_rel_retest = RETEST_Q18_02

gen combined_imp_rel = imp_rel
replace combined_imp_rel = imp_rel_retest if combined_imp_rel==.

* 32 - attendance
recode Q32 7=. /* recoding prefer not to answer to missing */
gen attend = Q32
recode attend 1=6 2=5 3=4 4=3 5=2 6=1

gen attend_retest = RETEST_Q32
recode attend_retest 1=6 2=5 3=4 4=3 5=2 6=1 7=.

gen combined_attend = attend
replace combined_attend = attend_retest if combined_attend==.

* 33 - pray before meals
gen praymeals = Q33
recode praymeals 5=0 4=1 3=2 2=3 1=4

gen praymeals_retest = RETEST_Q33
recode praymeals_retest 5=0 4=1 3=2 2=3 1=4

gen combined_praymeals = praymeals
replace combined_praymeals = praymeals_retest if combined_praymeals==.

* 34_01 - believe in heaven/hell
gen heavenhell = Q34_01
recode heavenhell 2=0

gen heavenhell_retest = RETEST_Q34_01
recode heavenhell_retest 2=0

gen combined_heavenhell = heavenhell
replace combined_heavenhell = heavenhell_retest if combined_heavenhell==.

* Religion factor analysis

alpha combined_imp_rel combined_attend combined_praymeals combined_heavenhell

factor combined_imp_rel combined_attend combined_praymeals combined_heavenhell, pcf factors(1)
predict combined_rel_factor

qui sum combined_rel_factor
local min = r(min)
replace combined_rel_factor = combined_rel_factor - `min'
qui sum combined_rel_factor
local max = r(max)
replace combined_rel_factor = combined_rel_factor/`max'

* US equivalent measures
gen relig_combined_US = ((combined_attend-1)/5 + (combined_imp_rel - 1)/3) /2


* Descriptive stats on religious denomination
* 35 - religious affiliation
** Think about how to combine these for people that move between categories
** between test and retest

gen religaff = Q35

gen religaff_retest = RETEST_Q35

gen combined_religaff = religaff
replace combined_religaff = religaff_retest if combined_religaff==.
replace combined_religaff = religaff_retest if combined_religaff==9 & religaff_retest!=. /* replacing if chose "Prefer not to answer" in Wave 1 with Wave 2 response */
replace combined_religaff = religaff_retest if religaff==6 & religaff_retest!=9 & religaff_retest!=.  /* replacing if chose "Other" in Wave 1 with Wave 2 response */

gen catholic = (combined_religaff==1)
gen protestant = (combined_religaff==2 | combined_religaff==3)






*** Big Five
* Traits

gen b5_ex=0
gen b5_ag=0
gen b5_co=0
gen b5_ne=0
gen b5_op=0

foreach var in 01 11 16 26 36 {
replace b5_ex=b5_ex+Q13_`var'
}

foreach var in 06 21 31 {
replace b5_ex=b5_ex+(6-Q13_`var')
}

foreach var in 07 17 22 32 42 {
replace b5_ag=b5_ag+Q13_`var'
}
foreach var in 02 12 27 37 {
replace b5_ag=b5_ag+(6-Q13_`var')
}

foreach var in 03 13 28 33 38 {
replace b5_co=b5_co+Q13_`var'
}
foreach var in 08 18 23 43 {
replace b5_co=b5_co+(6-Q13_`var')
}

foreach var in 04 14 19 29 39 {
replace b5_ne=b5_ne+Q13_`var'
}
foreach var in 09 24 34 {
replace b5_ne=b5_ne+(6-Q13_`var')
}

foreach var in 05 10 15 20 25 30 40 44 {
replace b5_op=b5_op+Q13_`var'
}
foreach var in 35 41 {
replace b5_op=b5_op+(6-Q13_`var')
}

replace b5_op=b5_op/10
replace b5_op=(b5_op-1)/4

replace b5_co=b5_co/9
replace b5_co=(b5_co-1)/4

replace b5_ex=b5_ex/8
replace b5_ex=(b5_ex-1)/4

replace b5_ag=b5_ag/9
replace b5_ag=(b5_ag-1)/4

replace b5_ne=b5_ne/8
replace b5_ne=(b5_ne-1)/4



label var b5_ex "Big Five - Extraversion"
label var b5_ag "Big Five - Agreeableness"
label var b5_co "Big Five - Conscientiousness"
label var b5_ne "Big Five - Neuroticism"
label var b5_op "Big Five - Openness"

* Facets - coded according to Appendix B of Soto and John 2009 Journal of Research in Personality

gen b5_ex_assertiveness=0
gen b5_ex_activity=0
gen b5_ag_altruism=0
gen b5_ag_compliance=0
gen b5_co_order=0
gen b5_co_selfdiscipline=0
gen b5_ne_anxiety=0
gen b5_ne_depression=0
gen b5_op_aesthetics=0
gen b5_op_ideas=0


foreach var in 01 26 {
replace b5_ex_assertiveness=b5_ex_assertiveness+Q13_`var'
}

foreach var in 06 21 31 {
replace b5_ex_assertiveness=b5_ex_assertiveness+(6-Q13_`var')
}

foreach var in 11 16 {
replace b5_ex_activity=b5_ex_activity+Q13_`var'
}
/* 36 excluded */





foreach var in 07 22 32 {
replace b5_ag_altruism=b5_ag_altruism+Q13_`var'
}
foreach var in 27 {
replace b5_ag_altruism=b5_ag_altruism+(6-Q13_`var')
}

foreach var in 17 {
replace b5_ag_compliance=b5_ag_compliance+Q13_`var'
}
foreach var in 02 12 {
replace b5_ag_compliance=b5_ag_compliance+(6-Q13_`var')
}
/* excluded 37 and 42 */




foreach var in 08 18 {
replace b5_co_order=b5_co_order+(6-Q13_`var')
}

foreach var in 13 28 38 {
replace b5_co_selfdiscipline=b5_co_selfdiscipline+Q13_`var'
}
foreach var in 23 43 {
replace b5_co_selfdiscipline=b5_co_selfdiscipline+(6-Q13_`var')
}
/* excluded 03 33 */




foreach var in 19 39 {
replace b5_ne_anxiety=b5_ne_anxiety+Q13_`var'
}
foreach var in 09 34 {
replace b5_ne_anxiety=b5_ne_anxiety+(6-Q13_`var')
}

foreach var in 04 29 {
replace b5_ne_depression=b5_ne_depression+Q13_`var'
}
/* excluded 14 24 */



foreach var in 30 44 {
replace b5_op_aesthetics=b5_op_aesthetics+Q13_`var'
}
foreach var in 41 {
replace b5_op_aesthetics=b5_op_aesthetics+(6-Q13_`var')
}
foreach var in 10 15 25 40 {
replace b5_op_ideas=b5_op_ideas+Q13_`var'
}
foreach var in 35 {
replace b5_op_ideas=b5_op_ideas+(6-Q13_`var')
}
/* excluded 05 20 */



replace b5_op_aesthetics=b5_op_aesthetics/3
replace b5_op_aesthetics=(b5_op_aesthetics-1)/4

replace b5_op_ideas=b5_op_ideas/5
replace b5_op_ideas=(b5_op_ideas-1)/4

replace b5_co_order=b5_co_order/2
replace b5_co_order=(b5_co_order-1)/4

replace b5_co_selfdiscipline=b5_co_selfdiscipline/5
replace b5_co_selfdiscipline=(b5_co_selfdiscipline-1)/4

replace b5_ex_assertiveness=b5_ex_assertiveness/5
replace b5_ex_assertiveness=(b5_ex_assertiveness-1)/4

replace b5_ex_activity=b5_ex_activity/2
replace b5_ex_activity=(b5_ex_activity-1)/4

replace b5_ag_altruism=b5_ag_altruism/4
replace b5_ag_altruism=(b5_ag_altruism-1)/4

replace b5_ag_compliance=b5_ag_compliance/3
replace b5_ag_compliance=(b5_ag_compliance-1)/4

replace b5_ne_anxiety=b5_ne_anxiety/4
replace b5_ne_anxiety=(b5_ne_anxiety-1)/4

replace b5_ne_depression=b5_ne_depression/2
replace b5_ne_depression=(b5_ne_depression-1)/4


* Big Five TIPI from retest

gen b5_op_retest = RETEST_QPE01_05 + (8 - RETEST_QPE01_10)
gen b5_co_retest = RETEST_QPE01_03 + (8 - RETEST_QPE01_08)
gen b5_ex_retest = RETEST_QPE01_01 + (8 - RETEST_QPE01_06)
gen b5_ag_retest = (8 - RETEST_QPE01_02) + RETEST_QPE01_07
gen b5_ne_retest = RETEST_QPE01_04 + (8 - RETEST_QPE01_09)

foreach var of varlist b5_op_retest-b5_ne_retest {
	replace `var' = (`var'-2)/12
}

foreach var of varlist b5_ex-b5_op {
	gen combined_`var' = `var'
	replace combined_`var' = `var'_retest if combined_`var'==.
}
/* Note: among same sex twins, ~415 have b5 scores from the longer version in wave 1; about ~150 from TIPI scores in wave 2 */







* Income

gen income = Q52

replace income =. if income==8

replace income = (income-1)/6


* Education

gen education = Q50

replace education = (education-1)/5


* Age

rename AGE age


* Sex

gen sex = 1 if SEX=="M"
replace sex = 0 if SEX=="F"



* Create zygosity variable
gen mz=ZYGOSITY
recode mz 2=1 3=2 4=2
label define mz 1 "MZ" 2 "DZ"
label val mz mz

** Delineate twin pairs

* Keep only MZ and DZ twins
keep if ID1=="01" | ID1=="02" 

* Keep only same-sex twins
keep if mz>=1 & ZYGOSITY<=4

* Identify complete pairs
egen pairnumber=group(FAMID1) if ID1=="01" | ID1=="02"

sort pairnumber
by pairnumber: egen completepair=count(pairnumber) if ID1=="01" | ID1=="02"
replace completepair=completepair-1

gen twinnumber = ID1
destring twinnumber, replace



#delimit ;
keep b5_ag_altruism b5_ag_compliance 
	b5_co_order b5_co_selfdiscipline 
	b5_ex_activity b5_ex_assertiveness 
	b5_ne_anxiety b5_ne_depression 
	b5_op_aesthetics b5_op_ideas 
	combined_wp_social 
	combined_rel_factor 
	combined_b5_op combined_b5_co combined_b5_ex combined_b5_ag combined_b5_ne 
	catholic protestant 
	income education age sex mz pairnumber twinnumber;
#delimit cr

order _all, alpha
order age sex mz pairnumber twinnumber, last



*** Set directory to save cleaned data
cd ""

save "AU_cleaned.dta", replace 


*** Collapse twins onto rows

foreach var of varlist b5_ag_altruism-age {
	rename `var' `var'_
}

reshape wide b5_ag_altruism_-age_, i(pairnumber) j(twinnumber)

save "AU_cleaned_pairs.dta", replace
outsheet using "AU_cleaned_pairs.csv", comma nolabel replace


*** Catholic and Protestant only datasets for supplemental analysis

preserve
* Catholic only
drop if catholic_1!=1 & catholic_2!=1 
outsheet using "AU_cleaned_pairs_Catholic.csv", comma nolabel replace
restore

preserve
* Protestant only
drop if protestant_1!=1 & protestant_2!=1 
outsheet using "AU_cleaned_pairs_Protestant.csv", comma nolabel replace
restore

